sample.biom.pr2 <- paste0(devdir, "data/18s.emu.all_json_md.biom")
sample.tree.pr2 <- paste0(devdir, "data/18s.emu.tree.nwk")
biom.pr2 <- import_biom(sample.biom.pr2, treefilename = sample.tree.pr2)
colnames(tax_table(biom.pr2)) <- c("Domain", "Subdivision", "Class", "Order", "Family", "Genus", "Species")
sampleid.pr2 <- sample_names(biom.pr2)
labels.biom.pr2 <- sample_data(biom.pr2)$Label
rank_names(biom.pr2)
## [1] "Domain" "Subdivision" "Class" "Order" "Family"
## [6] "Genus" "Species"
sample_variables(biom.pr2)
## [1] "BarcodeSequence" "Culture_setup" "Inoculum" "Label"
## [5] "PCR_amplicon" "Primer" "ProjectID" "Time_point"
sample_names(biom.pr2)
## [1] "flaregas-1" "flaregas-2" "flaregas-3" "flaregas-4" "flaregas-5"
## [6] "flaregas-6" "flaregas-7" "flaregas-8" "flaregas-9" "flaregas-10"
## [11] "flaregas-11" "flaregas-12" "flaregas-13" "flaregas-14" "flaregas-15"
## [16] "flaregas-16" "flaregas-17" "flaregas-18" "flaregas-19" "flaregas-20"
## [21] "flaregas-21" "flaregas-22" "flaregas-23" "flaregas-24" "flaregas-25"
## [26] "flaregas-26" "flaregas-27" "flaregas-28" "flaregas-29" "flaregas-30"
knitr::kable(as.matrix(sample_data(biom.pr2)))
| BarcodeSequence | Culture_setup | Inoculum | Label | PCR_amplicon | Primer | ProjectID | Time_point | |
|---|---|---|---|---|---|---|---|---|
| flaregas-1 | BC01 | ocean | Helsingor | H-LC-t0 | 16S | 27F+1492R | FAS64881 | 0 |
| flaregas-2 | BC02 | light+clean_gas | Helsingor | H-LC-t1 | 16S | 27F+1492R | FAS64881 | 7 |
| flaregas-3 | BC03 | light+clean_gas | Helsingor | H-LC-t3 | 16S | 27F+1492R | FAS64881 | 14 |
| flaregas-4 | BC04 | ocean | Helsingor | H-LC-t0 | 18S | ss5+ss3 | FAS64881 | 0 |
| flaregas-5 | BC05 | light+clean_gas | Helsingor | H-LC-t1 | 18S | ss5+ss3 | FAS64881 | 7 |
| flaregas-6 | BC06 | light+clean_gas | Helsingor | H-LC-t3 | 18S | ss5+ss3 | FAS64881 | 14 |
| flaregas-7 | BC07 | ocean | Helsingor | H-LC-t0 | 18S | EukA+EukB | FAS64881 | 0 |
| flaregas-8 | BC08 | light+clean_gas | Helsingor | H-LC-t1 | 18S | EukA+EukB | FAS64881 | 7 |
| flaregas-9 | BC09 | light+clean_gas | Helsingor | H-LC-t3 | 18S | EukA+EukB | FAS64881 | 14 |
| flaregas-10 | BC10 | ocean | Helsingor | H-LC-t0 | 16S+18S | 27F+1492R++EukA+EukB | FAS64881 | 0 |
| flaregas-11 | BC01 | ocean | Hornbaek-deep | Deep-DC-t0 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 0 |
| flaregas-12 | BC02 | ocean | Hornbaek-deep | Deep-DC-t0 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 0 |
| flaregas-13 | BC03 | light+clean_gas | Hornbaek-deep | Deep-LC-t1 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 5 |
| flaregas-14 | BC04 | light+clean_gas | Hornbaek-deep | Deep-LC-t2 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 7 |
| flaregas-15 | BC05 | light+clean_gas | Hornbaek-deep | Deep-LC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 11 |
| flaregas-16 | BC06 | light+clean_gas | Hornbaek-deep | Deep-LC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 11 |
| flaregas-17 | BC07 | light+used_gas | Hornbaek-deep | Deep-LU-t1 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 5 |
| flaregas-18 | BC08 | light+used_gas | Hornbaek-deep | Deep-LU-t2 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 7 |
| flaregas-19 | BC09 | light+used_gas | Hornbaek-deep | Deep-LU-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 11 |
| flaregas-20 | BC10 | light+used_gas | Hornbaek-deep | Deep-LU-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS65981 | 11 |
| flaregas-21 | BC11 | dark+clean_gas | Hornbaek-deep | Deep-DC-t1 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 5 |
| flaregas-22 | BC12 | dark+clean_gas | Hornbaek-deep | Deep-DC-t2 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 7 |
| flaregas-23 | BC13 | dark+clean_gas | Hornbaek-deep | Deep-DC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 11 |
| flaregas-24 | BC14 | dark+clean_gas | Hornbaek-deep | Deep-DC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 11 |
| flaregas-25 | BC15 | ocean | Hornbaek-shallow | Shallow-DC-t0 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 0 |
| flaregas-26 | BC16 | ocean | Hornbaek-shallow | Shallow-DC-t0 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 0 |
| flaregas-27 | BC17 | dark+clean_gas | Hornbaek-shallow | Shallow-DC-t1 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 5 |
| flaregas-28 | BC18 | dark+clean_gas | Hornbaek-shallow | Shallow-DC-t2 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 7 |
| flaregas-29 | BC19 | dark+clean_gas | Hornbaek-shallow | Shallow-DC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 11 |
| flaregas-30 | BC20 | dark+clean_gas | Hornbaek-shallow | Shallow-DC-t3 | 16S+18S | 27F+1492R++EukA+EukB | FAS64830 | 11 |
Subset eukaryotic microbes (18S)
#remove Mitochondria (Eukaryota:mito::kingdom level), Metazoa, Ichthyosporea, Choanoflagellata, and Fungi (all phylum level)
biom.filter.pr2 <- subset_taxa(biom.pr2, Domain!="Eukaryota:mito")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Metazoa")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Ichthyosporea")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Choanoflagellata")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Fungi")
#get eukaryotes (18S)
biom.18s <- subset_taxa(biom.filter.pr2, Domain=="Eukaryota")
#remove 16s specific samples
biom.18s <- prune_samples(!(sample_names(biom.18s)) %in% c("flaregas-1","flaregas-2","flaregas-3"), biom.18s)
sampleid.18s <- sample_names(biom.18s)
label.18s <- sample_data(biom.18s)$Label
meta <- rownames_to_column(as.data.frame(as.matrix(biom.18s@sam_data))) %>% dplyr::rename("Sample"="rowname")
##convert counts to integers for alpha diversity measures
biom.18s.int <- biom.18s
otu_table(biom.18s.int) <- otu_table(matrix(as.integer(otu_table(biom.18s)), ncol = nsamples(biom.18s), dimnames = list(taxa_names(biom.18s), sample_names(biom.18s))), taxa_are_rows = taxa_are_rows(biom.18s))
Remove all taxa that only occur in a single sample
biom.18s
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1472 taxa and 27 samples ]
## sample_data() Sample Data: [ 27 samples by 8 sample variables ]
## tax_table() Taxonomy Table: [ 1472 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1472 tips and 1424 internal nodes ]
#tax_stats(biom.18s)
# Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(biom.18s),
MARGIN = ifelse(taxa_are_rows(biom.18s), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prev0, TotalAbundance = taxa_sums(biom.18s), tax_table(biom.18s))
ggplot(prevdf, aes(Prevalence)) + geom_bar()
# Define prevalence threshold as 5% of total samples
prevalenceThreshold = 0.05 * nsamples(biom.18s)
prevalenceThreshold
## [1] 1.35
# Execute prevalence filter, using `prune_taxa()` function
biom.18s = prune_taxa((prev0 > prevalenceThreshold), biom.18s)
biom.18s
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1017 taxa and 27 samples ]
## sample_data() Sample Data: [ 27 samples by 8 sample variables ]
## tax_table() Taxonomy Table: [ 1017 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1017 tips and 990 internal nodes ]
#tax_stats(biom.18s)
Relative abundance
# Transform to relative abundance. Save as new object.
biom.18s.rel <- biom.18s %>% transform_sample_counts(function(x) {x / sum(x)})
#alternatively run: biom.18s.rel <- microbiome::transform(biom.18s, "compositional")
# visualize relative abundance distribution
d.rel.melt <- melt(otu_table(biom.18s.rel), id.vars = colnames(otu_table(biom.18s.rel))) %>% dplyr::rename("Sample" = "Var2", "Relative abundance" = "value")
d.rel.melt <- inner_join(d.rel.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/rel_abundance-distr-18s.pdf"), width = 14, height = 10)
ggplot(d.rel.melt, aes(`Relative abundance`, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
## png
## 2
Centered log-ratio transformation
# load the library zCompositions to perform 0 replacement
library(zCompositions)
# we are using the Count Zero Multiplicative approach
# z.warning is threshold used to identify individual rows or columns including an excess of zeros/unobserved values
d.n0 <- cmultRepl(otu_table(biom.18s), method="CZM", label=0, z.warning=0.99)
## No. adjusted imputations: 18593
dim(d.n0)
## [1] 1017 27
# generate the centered log-ratio transformed data
# samples by row
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.18s.clr <- biom.18s
otu_table(biom.18s.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)
# check if microbiome::transform gives us the same
#pseq.comp <- microbiome::transform(biom.18s, "clr")
#d.clr.melt <- melt(otu_table(pseq.comp), id.vars = colnames(otu_table(pseq.comp))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")
# visualize clr distribution
d.clr.melt <- melt(otu_table(biom.18s.clr), id.vars = colnames(otu_table(biom.18s.clr))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")
d.clr.melt <- inner_join(d.clr.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/clr-distr-18s.pdf"), width = 14, height = 10)
ggplot(d.clr.melt, aes(clr, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
## png
## 2
Maximal relative abundances (>3%) for each Culture setup aggregated for each genus
temp.18s <- merge_samples(biom.18s, "Culture_setup") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Abundance>0.03) %>% arrange(Genus) %>% dplyr::arrange(desc(Abundance))
knitr::kable(as.matrix(temp.18s[,c(2,3,13:17)]))
| Sample | Abundance | Subdivision | Class | Order | Family | Genus |
|---|---|---|---|---|---|---|
| light+clean_gas | 0.44084499 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Picochlorum |
| light+used_gas | 0.39396842 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Picochlorum |
| light+used_gas | 0.22031143 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Nannochloris |
| light+clean_gas | 0.19163245 | Chlorophyta_X | Chlorophyceae | Chlamydomonadales | Chlamydomonadales_X | Chlamydomonas |
| dark+clean_gas | 0.18925261 | Ciliophora | Spirotrichea | Euplotia | Aspidiscidae | Aspidisca |
| dark+clean_gas | 0.17843859 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Picochlorum |
| light+clean_gas | 0.16703950 | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Tetraselmis |
| dark+clean_gas | 0.15025249 | Ciliophora | Spirotrichea | Choreotrichida | Strobilidiidae | Rimostrombidium |
| light+used_gas | 0.13584400 | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Tetraselmis |
| ocean | 0.12174715 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Picochlorum |
| light+used_gas | 0.11292061 | Chlorophyta_X | Chlorophyceae | Chlamydomonadales | Chlamydomonadales_X | Chlamydomonadales_XX |
| ocean | 0.07339914 | Dinoflagellata | Dinophyceae | Peridiniales | Heterocapsaceae | Heterocapsa |
| ocean | 0.07308266 | Ciliophora | Phyllopharyngea | Cyrtophoria_5 | Chlamydodontidae | Chlamydodon |
| dark+clean_gas | 0.06198952 | Chlorophyta_X | Chlorophyceae | Chlamydomonadales | Chlamydomonadales_X | Chlamydomonas |
| ocean | 0.05883358 | Dinoflagellata | Dinophyceae | Gymnodiniales | Gymnodiniaceae | Lepidodinium |
| dark+clean_gas | 0.05873322 | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadaceae | Spumella |
| light+clean_gas | 0.05840299 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Nannochloris |
| dark+clean_gas | 0.05690629 | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Nannochloris |
| dark+clean_gas | 0.04818719 | Ciliophora | Oligohymenophorea | Scuticociliatia_1 | Philasterida | Philasterida_X |
| light+clean_gas | 0.04769889 | Bigyra | Bicoecea | Bicoecea_X | Bicoecea_XX | Bicoecea_XXX |
| dark+clean_gas | 0.04372455 | Gyrista | Chrysophyceae | Paraphysomonadales | Paraphysomonadaceae | Paraphysomonas |
| light+used_gas | 0.04206976 | Bigyra | Bicoecea | Bicoecea_X | Bicoecea_XX | Bicoecea_XXX |
| ocean | 0.03399117 | Ciliophora | Spirotrichea | Choreotrichida | Strobilidiidae | Rimostrombidium |
| dark+clean_gas | 0.03197411 | Cercozoa | Filosa-Granofilosea | Cryptofilida | Novel-Gran-2 | Novel-Gran-2_X |
| dark+clean_gas | 0.03153655 | Bigyra | Bicoecea | Anoecales | Caecitellaceae | Caecitellus |
| ocean | 0.03115222 | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-2 | Dino-Group-II-Clade-2_X |
| light+used_gas | 0.03114172 | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Chlorodendrales_XX |
set.seed(1)
#https://microbiome.github.io/tutorials/DMM.html
library(DirichletMultinomial)
# prefilter relative abundances for prevalent taxa
biom.18s.rel.genus <- biom.18s.rel %>% tax_glom(taxrank = "Genus")
biom.18s.genus <- biom.18s %>% tax_glom(taxrank = "Genus")
taxa <- core_members(biom.18s.rel.genus, detection = 1/200, prevalence = 2/27) #table(meta$Culture_setup) -> minimum no. samples of ocean~Hornbaek-deep: 6
pseq <- prune_taxa(taxa, biom.18s.genus)
dat <- abundances(pseq)
#rownames(dat) <- paste(tax_table(pseq)[,"Genus"], tax_table(pseq)[,"Species"], sep = "::")
rownames(dat) <- tax_table(pseq)[,"Genus"]
count <- as.matrix(t(dat))
# fit the DMM model
fit <- lapply(1:7, dmn, count = count, verbose=TRUE, seed = 1234567)
## dmn, k=1
## Soft kmeans
## Expectation Maximization setup
## Expectation Maximization
## Hessian
## dmn, k=2
## Soft kmeans
## iteration 10 change 0.002046
## Expectation Maximization setup
## Expectation Maximization
## Hessian
## dmn, k=3
## Soft kmeans
## iteration 10 change 0.029834
## Expectation Maximization setup
## Expectation Maximization
## Hessian
## dmn, k=4
## Soft kmeans
## iteration 10 change 0.001305
## iteration 20 change 0.000021
## Expectation Maximization setup
## Expectation Maximization
## iteration 10 change 0.000000
## Hessian
## dmn, k=5
## Soft kmeans
## iteration 10 change 0.058220
## iteration 20 change 0.000001
## Expectation Maximization setup
## Expectation Maximization
## Hessian
## dmn, k=6
## Soft kmeans
## iteration 10 change 0.002857
## Expectation Maximization setup
## Expectation Maximization
## Hessian
## dmn, k=7
## Soft kmeans
## iteration 10 change 0.002111
## Expectation Maximization setup
## Expectation Maximization
## Hessian
# Check model fit with different number of mixture components using standard information criteria
lplc <- base::sapply(fit, DirichletMultinomial::laplace) # AIC / BIC / Laplace
aic <- base::sapply(fit, DirichletMultinomial::AIC) # AIC / BIC / Laplace
bic <- base::sapply(fit, DirichletMultinomial::BIC) # AIC / BIC / Laplace
plot(lplc, type="b", xlab="Number of Dirichlet Components", ylab="Model Fit", ylim=c(min(c(lplc,aic,bic), na.rm = T), max(c(lplc,aic,bic), na.rm = T))); lines(aic, type="b", lty = 2); lines(bic, type="b", lty = 3)
# pick the optimal model
best <- fit[[which.min(unlist(lplc))]]
# mixture parameters pi and theta
mixturewt(best)
## pi theta
## 1 0.4444444 16.47541
## 2 0.2962963 15.38982
## 3 0.2592593 39.73563
# sample-component assignments
ass <- apply(mixture(best), 1, which.max)
temp.meta <- meta
temp.meta$cluster <- ass
knitr::kable(as.matrix(temp.meta %>% dplyr::select(Culture_setup, Inoculum, Time_point, cluster)))
| Culture_setup | Inoculum | Time_point | cluster |
|---|---|---|---|
| ocean | Helsingor | 0 | 3 |
| light+clean_gas | Helsingor | 7 | 1 |
| light+clean_gas | Helsingor | 14 | 1 |
| ocean | Helsingor | 0 | 3 |
| light+clean_gas | Helsingor | 7 | 1 |
| light+clean_gas | Helsingor | 14 | 1 |
| ocean | Helsingor | 0 | 3 |
| ocean | Hornbaek-deep | 0 | 3 |
| ocean | Hornbaek-deep | 0 | 3 |
| light+clean_gas | Hornbaek-deep | 5 | 1 |
| light+clean_gas | Hornbaek-deep | 7 | 1 |
| light+clean_gas | Hornbaek-deep | 11 | 1 |
| light+clean_gas | Hornbaek-deep | 11 | 1 |
| light+used_gas | Hornbaek-deep | 5 | 1 |
| light+used_gas | Hornbaek-deep | 7 | 1 |
| light+used_gas | Hornbaek-deep | 11 | 1 |
| light+used_gas | Hornbaek-deep | 11 | 1 |
| dark+clean_gas | Hornbaek-deep | 5 | 2 |
| dark+clean_gas | Hornbaek-deep | 7 | 2 |
| dark+clean_gas | Hornbaek-deep | 11 | 2 |
| dark+clean_gas | Hornbaek-deep | 11 | 2 |
| ocean | Hornbaek-shallow | 0 | 3 |
| ocean | Hornbaek-shallow | 0 | 3 |
| dark+clean_gas | Hornbaek-shallow | 5 | 2 |
| dark+clean_gas | Hornbaek-shallow | 7 | 2 |
| dark+clean_gas | Hornbaek-shallow | 11 | 2 |
| dark+clean_gas | Hornbaek-shallow | 11 | 2 |
# Contribution of each taxonomic group to each component
for (k in seq(ncol(fitted(best)))) {
d <- melt(fitted(best))
colnames(d) <- c("OTU", "cluster", "value")
d <- subset(d, cluster == k) %>%
# Arrange OTUs by assignment strength
arrange(value) %>%
mutate(OTU = factor(OTU, levels = unique(OTU))) %>%
# Only show the most important drivers
filter(abs(value) > quantile(abs(value), 0.8))
p <- ggplot(d, aes(x = OTU, y = value)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = paste("Top drivers: community type", k))
print(p)
}
Different statistical methods (alpha and beta diversity, DEA) require raw read counts instead of probabilities as calculated by Emu.
#get minimap2 filtered raw counts
sample.biom <- paste0(devdir, "data/18s.minimap2.all_json_md.biom")
sample.tree <- paste0(devdir, "data/18s.minimap2.tree.nwk")
biom.m <- import_biom(sample.biom, treefilename = sample.tree)
colnames(tax_table(biom.m)) <- c("Domain", "Supergroup", "Division", "Subdivision", "Class", "Order", "Family", "Genus", "Species")
tax.m <- data.frame(tax_table(biom.m))
tax.clean.m <- data.frame(row.names = row.names(tax.m),
Domain = str_replace(tax.m[,1], "D_0__", ""),
Supergroup = str_replace(tax.m[,2], "D_1__", ""),
Division = str_replace(tax.m[,3], "D_2__", ""),
Subdivision = str_replace(tax.m[,4], "D_3__", ""),
Class = str_replace(tax.m[,5], "D_4__", ""),
Order = str_replace(tax.m[,6], "D_5__", ""),
Family = str_replace(tax.m[,7], "D_6__", ""),
Genus = str_replace(tax.m[,8], "D_7__", ""),
Species = str_replace(tax.m[,9], "D_8__", ""),
stringsAsFactors = FALSE)
tax_table(biom.m) <- as.matrix(tax.clean.m)
biom.m <- fix_duplicate_tax(biom.m)
#filter for eukaryotes
biom.filter.pr2.m <- subset_taxa(biom.m, Domain!="Eukaryota:mito")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Metazoa")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Ichthyosporea")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Choanoflagellata")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Fungi")
biom.18s.m <- subset_taxa(biom.filter.pr2.m, Domain=="Eukaryota")
biom.18s.m <- prune_samples(!(sample_names(biom.18s.m)) %in% c("flaregas-1","flaregas-2","flaregas-3"), biom.18s.m)
#preprocess phyloseq object
biom.18s.m.unfiltered <- biom.18s.m
prev0.m = apply(X = otu_table(biom.18s.m),
MARGIN = ifelse(taxa_are_rows(biom.18s.m), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf.m = data.frame(Prevalence = prev0.m, TotalAbundance = taxa_sums(biom.18s.m), tax_table(biom.18s.m))
ggplot(prevdf.m, aes(Prevalence)) + geom_bar()
prevalenceThreshold.m = 0.05 * nsamples(biom.18s.m)
biom.18s.m = prune_taxa((prev0.m > prevalenceThreshold.m), biom.18s.m)
biom.18s.m
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5594 taxa and 27 samples ]
## sample_data() Sample Data: [ 27 samples by 8 sample variables ]
## tax_table() Taxonomy Table: [ 5594 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 5594 tips and 5309 internal nodes ]
#tax_stats(biom.18s.m)
#centered log-ratio transformation
d.n0 <- cmultRepl(otu_table(biom.18s.m), method="CZM", label=0, z.warning=0.99)
## No. adjusted imputations: 115831
dim(d.n0)
## [1] 5594 27
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.18s.m.clr <- biom.18s.m
otu_table(biom.18s.m.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)
#transform to relative abundance
biom.18s.m.rel <- biom.18s.m %>% transform_sample_counts(function(x) {x / sum(x)})
#genus
#get genus counts
biom.18s.m.genus <- biom.18s.m %>% tax_glom(taxrank = "Genus")
#filter genera that exist also running Emu on minimap2 counts
biom.18s.genus.minimap2emu <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% dplyr::select(Genus) %>% rownames_to_column(var = "SpeciesID") %>% inner_join(tax_table(biom.18s.genus) %>% as.data.frame() %>% dplyr::select(Genus), by = "Genus")
This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).
Sample richness in the different culture setups and different time points (emu)
#https://docs.onecodex.com/en/articles/4136553-alpha-diversity
alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")
p <- plot_richness(biom.18s.int, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-18s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.int, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")
#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.18s.int, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.18s.int)
temp$dominant <- tax_table(biom.18s.int)[dominant(biom.18s.int),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))
p <- plot_richness(biom.18s.int, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-18s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.int, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")
Sample richness in the different culture setups and different time points (minimap2)
alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")
p <- plot_richness(biom.18s.m.unfiltered, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-18s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.m.unfiltered, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")
#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.18s.m.unfiltered, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.18s.m.unfiltered)
temp$dominant <- tax_table(biom.18s.m.unfiltered)[dominant(biom.18s.m.unfiltered),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))
p <- plot_richness(biom.18s.m.unfiltered, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-18s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.m.unfiltered, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")
This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).
PCA (emu)
# apply a singular value decomposition to the dataset
# do not use princomp function in R!!
pcx <- prcomp(t(otu_table(biom.18s.clr)), scale. = T)
# generate a scree plot
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/
fviz_eig(pcx, addlabels = TRUE)
# generate biplot (check https://colorbrewer2.org/ for selecting colors)
pdf(paste0(wdir,"fig/clr-pca-18s-emu.pdf"), width = 30, height = 8)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(3,4), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
dev.off()
PCA (minimap2)
#biplot
pcx <- prcomp(t(otu_table(biom.18s.m.clr)), scale. = T)
fviz_eig(pcx, addlabels = TRUE)
# generate biplot (check https://colorbrewer2.org/ for selecting colors)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Time_point, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_manual(values=c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00')) + labs(color = "Group")
pdf(paste0(wdir,"fig/clr-pca-18s-minimap2.pdf"), width = 30, height = 8)
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
## Too few points to calculate an ellipse
dev.off()
Unifrac (emu)
# average-link clustering
# Use unifrac on original compositional data: If your data is truly compositional (relative abundances sum to 1), consider using unifrac directly on the untransformed data
biom.hclust <- hclust(phyloseq::distance(biom.18s, method = "unifrac"), method="average")
biom.hclust$labels <- sample_data(biom.18s)$Label
plot(biom.hclust)
#biom.hclust <- hclust(phyloseq::distance(biom.18s, "wunifrac"), method="average")
#biom.hclust$labels <- sample_data(biom.18s)$Label
#plot(biom.hclust)
#biom.hclust <- hclust(phyloseq::distance(biom.18s, "bray"), method="average")
#biom.hclust$labels <- sample_data(biom.18s)$Label
#plot(biom.hclust)
Unifrac (minimap2)
# average-link clustering
# Use unifrac on original compositional data: If your data is truly compositional (relative abundances sum to 1), consider using unifrac directly on the untransformed data
biom.hclust <- hclust(phyloseq::distance(biom.18s.m, method = "unifrac"), method="average")
biom.hclust$labels <- sample_data(biom.18s)$Label
plot(biom.hclust)
PERMANOVA
This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).
#https://microbiome.github.io/tutorials/PERMANOVA.html
plot_landscape(biom.18s.m.rel, method = "NMDS", distance = "bray", col = "Culture_setup", size = 3)
plot_landscape(biom.18s.m.rel, method = "NMDS", distance = "bray", col = "Time_point", size = 3)
##https://deneflab.github.io/MicrobeMiseq/demos/mothur_2_phyloseq.html
set.seed(1)
# Calculate bray curtis distance matrix
biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "bray")
#biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "unifrac")
#biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "wunifrac")
# make a data frame from the sample_data
#sampledf <- data.frame(sample_data(biom.18s.m))
sampledf <- meta(biom.18s.m)
#test the hypothesis that the different culture setups have different centroids
# Adonis test (H0 .. different cultures have the same centroid)
perm <- adonis2(biom.18s.dist ~ Culture_setup, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = biom.18s.dist ~ Culture_setup, data = sampledf)
## Df SumOfSqs R2 F Pr(>F)
## Culture_setup 3 3.4497 0.44175 6.0667 0.001 ***
## Residual 23 4.3595 0.55825
## Total 26 7.8093 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different cultures have the same dispersion)
beta <- betadisper(biom.18s.dist, sampledf$Culture_setup)
permutest(beta) #alternatively use anova(beta)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.17440 0.058133 2.1949 999 0.101
## Residuals 23 0.60917 0.026486
#test the hypothesis that the different time points have different centroids
# Adonis test (H0 .. different different time points have the same centroid)
perm <- adonis2(biom.18s.dist ~ Time_point, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = biom.18s.dist ~ Time_point, data = sampledf)
## Df SumOfSqs R2 F Pr(>F)
## Time_point 4 3.1961 0.40927 3.8105 0.001 ***
## Residual 22 4.6132 0.59073
## Total 26 7.8093 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different time points have the same dispersion)
beta <- betadisper(biom.18s.dist, sampledf$Time_point)
permutest(beta)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 0.18157 0.045392 1.1704 999 0.346
## Residuals 22 0.85325 0.038784
ANOVA-like differential expression of genera (minimap2)
set.seed(12345)
# test differential expression: (light.clean_gas+light.used_gas) - dark.clean_gas
meta.sub.1 <- meta %>% dplyr::filter(Culture_setup == "light+clean_gas" | Culture_setup == "light+used_gas" | Culture_setup == "dark+clean_gas") %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
mm <- model.matrix(~ Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.18s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1)
aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1)
#aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='volcano', test='effect', cutoff.effect = 1)
#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSdark <- glm.eff$`Culture_setuplight+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]
glm.eff.lightVSdark <- glm.eff.lightVSdark %>% dplyr::filter(rab.all!="NA")
# higher relative abundance under light
glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% nrow()
## [1] 3
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),]))
| Domain | Supergroup | Division | Subdivision | Class | Order | Family | Genus | Species | |
|---|---|---|---|---|---|---|---|---|---|
| DQ079859.1.1794_U | Eukaryota | Haptista | Haptophyta | Haptophyta_X | Prymnesiophyceae | Isochrysidales | Isochrysidaceae | Tisochrysis | NA |
| KY054995.1.1744_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Tetraselmis | NA |
| EF527133.16.1840_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Chlorodendrales_XX | NA |
# higher relative abundance in the dark
glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% nrow()
## [1] 11
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),]))
| Domain | Supergroup | Division | Subdivision | Class | Order | Family | Genus | Species | |
|---|---|---|---|---|---|---|---|---|---|
| MN945085.1.1704_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadaceae | Pedospumella | NA |
| EF633325.1.1748_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadales_X | Ochromonadales_XX | NA |
| JQ782092.1.1744_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadaceae | Spumella | NA |
| AY620366.1.1247_U | Eukaryota | TSAR | Rhizaria | Cercozoa | Filosa-Granofilosea | Cryptofilida | Novel-Gran-2 | Novel-Gran-2_X | NA |
| DQ388459.1.1722_U | Eukaryota | TSAR | Alveolata | Dinoflagellata | Dinophyceae | Prorocentrales | Prorocentraceae | Prorocentrum | NA |
| EF024169.1.1808_U | Eukaryota | TSAR | Rhizaria | Cercozoa | Filosa-Granofilosea | Filosa-Granofilosea_X | Novel-Gran-6 | Novel-Gran-6_X | NA |
| KU363283.1.1461_U | Eukaryota | TSAR | Alveolata | Ciliophora | Oligohymenophorea | Peritrichia_2 | Sessilida | Sessilida_X | NA |
| DQ504338.1.1561_U | Eukaryota | TSAR | Alveolata | Ciliophora | Spirotrichea | Euplotia | Aspidiscidae | Aspidisca | NA |
| AF271998.1.1795_U | Eukaryota | Obazoa | Opisthokonta | Filasterea | Filasterea_X | Ministerida | Ministeridae | Ministeria | NA |
| MF197366.1.2097_U | Eukaryota | Amoebozoa | Discosea | Discosea_X | Flabellinia | Dactylopodida | Paramoebidae | Paramoeba | NA |
| AY277797.1.1861_U | Eukaryota | Amoebozoa | Tubulinea | Tubulinea_X | Elardia | Leptomyxida | Flabellulidae | Paraflabellula | NA |
temp.1 <- glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-light_clean_usedVSdark-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex, aes(x=Genus, y=diff.btw, color=Subdivision)) +
geom_jitter(size=3, width = 0.2) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + ylab("logFC")
dev.off()
## png
## 2
for(i in glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1,] %>% row.names()) {
de.genus <- tax_table(biom.18s.m.genus)[i,"Genus"]
p <- boxplot_abundance(biom.18s.m.genus, x = "Culture_setup", y = i) + scale_y_log10() + ggtitle(de.genus)
print(p)
}
set.seed(12345)
# test differential expression: ((light.clean_gas+light.used_gas) - ocean) AND (dark.clean_gas - ocean)
meta.sub.1 <- meta %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
meta.sub.1$Culture_setup <- factor(meta.sub.1$Culture_setup, levels = c("ocean", "dark+clean_gas", "light+clean_gas"))
mm <- model.matrix(~0 + Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.18s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test.1 <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff.1<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all complete
## rab.win complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)
#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)
#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)
#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSocean <- glm.eff.1$`Culture_setuplight+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]
glm.eff.darkVSocean <- glm.eff.1$`Culture_setupdark+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]
# higher relative abundance in light-incubated cultures than in the ocean
glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% nrow()
## [1] 8
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% row.names(),]))
| Domain | Supergroup | Division | Subdivision | Class | Order | Family | Genus | Species | |
|---|---|---|---|---|---|---|---|---|---|
| DQ079859.1.1794_U | Eukaryota | Haptista | Haptophyta | Haptophyta_X | Prymnesiophyceae | Isochrysidales | Isochrysidaceae | Tisochrysis | NA |
| KC875350.1.1077_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Chlorophyceae | Chlamydomonadales | Chlamydomonadales_X | Dunaliella | NA |
| JF790981.1.1777_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Chlorellales_XX | NA |
| AB058309.1.1749_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Picochlorum | NA |
| AB080302.1.1788_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Trebouxiophyceae | Chlorellales | Chlorellales_X | Nannochloris | NA |
| KY054986.1.1708_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Eustigmatophyceae | Eustigmatophyceae_X | Eustigmatophyceae_XX | Nannochloropsis | NA |
| KY054995.1.1744_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Tetraselmis | NA |
| EF527133.16.1840_U | Eukaryota | Archaeplastida | Chlorophyta | Chlorophyta_X | Chlorodendrophyceae | Chlorodendrales | Chlorodendraceae | Chlorodendrales_XX | NA |
# higher relative abundance in dark-incubated cultures than in the ocean
glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% nrow()
## [1] 7
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% row.names(),]))
| Domain | Supergroup | Division | Subdivision | Class | Order | Family | Genus | Species | |
|---|---|---|---|---|---|---|---|---|---|
| MN945085.1.1704_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadaceae | Pedospumella | NA |
| EF633325.1.1748_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadales_X | Ochromonadales_XX | NA |
| JQ782092.1.1744_U | Eukaryota | TSAR | Stramenopiles | Gyrista | Chrysophyceae | Ochromonadales | Ochromonadaceae | Spumella | NA |
| AY620366.1.1247_U | Eukaryota | TSAR | Rhizaria | Cercozoa | Filosa-Granofilosea | Cryptofilida | Novel-Gran-2 | Novel-Gran-2_X | NA |
| EF024169.1.1808_U | Eukaryota | TSAR | Rhizaria | Cercozoa | Filosa-Granofilosea | Filosa-Granofilosea_X | Novel-Gran-6 | Novel-Gran-6_X | NA |
| DQ504338.1.1561_U | Eukaryota | TSAR | Alveolata | Ciliophora | Spirotrichea | Euplotia | Aspidiscidae | Aspidisca | NA |
| MF197366.1.2097_U | Eukaryota | Amoebozoa | Discosea | Discosea_X | Flabellinia | Dactylopodida | Paramoebidae | Paramoeba | NA |
temp.1 <- glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.lightVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-lightVSocean-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.lightVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Subdivision)) +
geom_point(size=3, width = 0.2) + coord_flip() +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png
## 2
temp.1 <- glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5, ] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.darkVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-darkVSocean-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.darkVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Subdivision)) +
geom_point(size=3, width = 0.2) + coord_flip() +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png
## 2
Differential abundance testing of genera with DESeq2 (minimap2)
As DESeq2 expects raw counts we will use here the minimap2 counts instead of emu counts.
#run DESeq2
#https://micca.readthedocs.io/en/latest/phyloseq.html
sample_data(biom.18s.m.genus)$Culture_setup <- as.factor(sample_data(biom.18s.m.genus)$Culture_setup)
ds <- phyloseq_to_deseq2(biom.18s.m.genus, ~ Inoculum + Culture_setup)
## converting counts to integer mode
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
ds <- DESeq(ds)
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
alpha <- 0.001
# test differential expression: light.clean_gas - dark.clean_gas
res.1 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "dark+clean_gas"), alpha=alpha)
summary(res.1)
##
## out of 969 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up) : 9, 0.93%
## LFC < 0 (down) : 22, 2.3%
## outliers [1] : 15, 1.5%
## low counts [2] : 714, 74%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.1 <- res.1[order(res.1$padj, na.last=NA), ]
res_sig <- res.1[(res.1$padj < alpha), ]
res_sig
## log2 fold change (MLE): Culture_setup light+clean_gas vs dark+clean_gas
## Wald test p-value: Culture setup light.clean gas vs dark.clean gas
## DataFrame with 31 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## DQ504338.1.1561_U 8428.8226 -11.95311 1.149804 -10.39578 2.59162e-25
## KY320339.1.1613_U 4.4960 16.33856 2.048889 7.97435 1.53184e-15
## EF024169.1.1808_U 64.7769 -7.94853 1.005054 -7.90856 2.60389e-15
## AY620366.1.1247_U 1421.6719 -9.35420 1.209885 -7.73148 1.06307e-14
## AY277797.1.1861_U 12.3586 -6.67033 0.961394 -6.93819 3.97169e-12
## ... ... ... ... ... ...
## KU891599.1.1820_U 1882.6365 -4.27709 1.055190 -4.05339 5.04812e-05
## EF486866.1.1676_U 39.8434 -8.47230 2.093419 -4.04711 5.18534e-05
## EU567232.1.1098_U 11.8458 -5.71658 1.458588 -3.91926 8.88229e-05
## GQ465466.1.1758_U 215.6732 3.63700 0.947935 3.83676 1.24669e-04
## MF197366.1.2097_U 27.2535 -4.81145 1.252632 -3.84108 1.22496e-04
## padj
## <numeric>
## DQ504338.1.1561_U 6.21990e-23
## KY320339.1.1613_U 1.83821e-13
## EF024169.1.1808_U 2.08311e-13
## AY620366.1.1247_U 6.37844e-13
## AY277797.1.1861_U 1.90641e-10
## ... ...
## KU891599.1.1820_U 0.000444458
## EF486866.1.1676_U 0.000444458
## EU567232.1.1098_U 0.000735086
## GQ465466.1.1758_U 0.000965181
## MF197366.1.2097_U 0.000965181
DESeq2::plotMA(res.1, alpha = alpha, ylim = c(-9,9))
res.1 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
## up down
## 1 9 22
# up down
#1 9 22
res_sig <- cbind(as(res_sig, "data.frame"), as(tax_table(biom.18s.m)[rownames(res_sig), ], "matrix"))
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=log2FoldChange, color=Subdivision)) +
geom_jitter(size=3, width = 0.2) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png
## 2
#View(cbind(as(res_sig, "data.frame"), as(otu_table(biom.18s.m)[rownames(res_sig), ], "matrix")))
#volcano plot
volDat <- as.data.frame(res.1)
volDat <- volDat[!is.na(volDat$padj), ]
volDat$DGE <- volDat$padj < alpha
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-volcano-genus-18s.pdf"), width = 15, height = 13)
ggplot(volDat, aes(x = log2FoldChange, y = -log10(padj), color = DGE)) +
geom_point(alpha = 0.50) +
scale_color_manual(values=c("TRUE" = "red", "FALSE" = "gray"))
dev.off
## function (which = dev.cur())
## {
## if (which == 1)
## stop("cannot shut down device 1 (the null device)")
## .External(C_devoff, as.integer(which))
## dev.cur()
## }
## <bytecode: 0x56483d42b208>
## <environment: namespace:grDevices>
#run DESeq2
# test differential expression: light.clean_gas - light.used_gas
res.2 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "light+used_gas"), alpha=alpha)
summary(res.2)
##
## out of 969 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 15, 1.5%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.2 <- res.2[order(res.2$padj, na.last=NA), ]
DESeq2::plotMA(res.2, alpha = alpha, ylim = c(-9,9))
res.2 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
## up down
## 1 0 0
# up down
#1 0 0
Differential abundance testing of genera with edgeR (minimap2)
As edgeR expects raw counts we will use here the minimap2 counts instead of emu counts.
er <- phyloseq2edgeR(biom.18s.m.genus)
#normalize
er <- edgeR::calcNormFactors(er, method = "TMM")
#metadata
meta.sub <- as.data.frame(sample_data(biom.18s.m.genus)[,c(2,3,8)])
meta.sub <- meta.sub %>% as_tibble() %>% mutate(across(.cols = 3, .fns = as.integer)) %>% mutate(across(.cols = 1:2, .fns = make.names))
#estimate dispersion
design <- model.matrix(~0 + Culture_setup + Inoculum, data=meta.sub)
er <- edgeR::estimateDisp(er, design)
plotBCV(er)
#differential expression
fit <- glmQLFit(er, design, robust = TRUE)
plotQLDisp(fit)
# test differential expression: light.clean_gas - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1,0,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
## -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
## Down 16
## NotSig 952
## Up 1
# -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
#Down 19
#NotSig 948
#Up 2
dge.1 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 1
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logCPM, logFC, FDR)))
| Genus | logCPM | logFC | FDR | |
|---|---|---|---|---|
| GQ468539.1.1784_U | Cylindrotheca | 8.747015 | 4.290248 | 0.0007334532 |
dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 16
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logCPM, logFC, FDR)))
| Genus | logCPM | logFC | FDR | |
|---|---|---|---|---|
| EF024169.1.1808_U | Novel-Gran-6_X | 9.357027 | -8.377701 | 2.830945e-06 |
| DQ504338.1.1561_U | Aspidisca | 16.386715 | -11.917630 | 4.822431e-06 |
| FJ870103.1.1752_U | Cinetochilum | 9.236447 | -9.528387 | 1.811784e-05 |
| AY277797.1.1861_U | Paraflabellula | 7.146863 | -6.379301 | 1.811784e-05 |
| AY620366.1.1247_U | Novel-Gran-2_X | 13.754814 | -9.932116 | 1.870539e-05 |
| HQ121441.1.1726_U | Micrometopion | 8.943413 | -10.443996 | 2.157481e-05 |
| AY749517.1.1757_U | Pterocystida_XXX | 7.995959 | -6.593519 | 3.345444e-05 |
| EF024494.1.1800_U | Cercomonas | 6.957978 | -7.509189 | 3.679240e-05 |
| EF633325.1.1748_U | Ochromonadales_XX | 14.868801 | -9.999643 | 7.078945e-05 |
| JQ782092.1.1744_U | Spumella | 13.158507 | -9.682907 | 8.574012e-05 |
| EU567232.1.1098_U | Novel-Gran-1_X | 7.152513 | -9.238268 | 8.574012e-05 |
| KM065452.1.1878_U | Massisteria | 6.896744 | -5.069302 | 8.574012e-05 |
| KU363283.1.1461_U | Sessilida_X | 9.259330 | -6.203220 | 8.829018e-05 |
| DQ388459.1.1722_U | Prorocentrum | 8.448156 | -4.312935 | 2.892929e-04 |
| AJ549210.1.1823_U | Euplotes | 12.152814 | -4.144860 | 3.000959e-04 |
| AF271998.1.1795_U | Ministeria | 9.258585 | -8.557191 | 8.881408e-04 |
res_sig <- dge.1 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-light_cleanVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Subdivision)) +
geom_jitter(size=3, width = 0.2) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png
## 2
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1/2,1/2,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
## -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
## Down 22
## NotSig 946
## Up 1
# -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
#Down 24
#NotSig 943
#Up 2
dge.2 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 1
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
| Genus | logFC | FDR | |
|---|---|---|---|
| GQ468539.1.1784_U | Cylindrotheca | 4.121972 | 0.0006522498 |
dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 22
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
| Genus | logFC | FDR | |
|---|---|---|---|
| EF024169.1.1808_U | Novel-Gran-6_X | -8.459597 | 2.784921e-07 |
| DQ504338.1.1561_U | Aspidisca | -10.582575 | 2.784921e-07 |
| AY749517.1.1757_U | Pterocystida_XXX | -8.163704 | 2.784921e-07 |
| AY620366.1.1247_U | Novel-Gran-2_X | -9.757920 | 2.784921e-07 |
| EF633325.1.1748_U | Ochromonadales_XX | -10.972292 | 3.478802e-07 |
| JQ782092.1.1744_U | Spumella | -9.926400 | 3.648485e-07 |
| FJ870103.1.1752_U | Cinetochilum | -9.244398 | 5.762508e-07 |
| AY277797.1.1861_U | Paraflabellula | -5.911446 | 9.666456e-07 |
| KU363283.1.1461_U | Sessilida_X | -7.207558 | 2.516731e-06 |
| EF024494.1.1800_U | Cercomonas | -7.084886 | 2.696626e-06 |
| HQ121441.1.1726_U | Micrometopion | -8.561980 | 5.495526e-06 |
| KM065452.1.1878_U | Massisteria | -4.682554 | 2.083382e-05 |
| EU567232.1.1098_U | Novel-Gran-1_X | -7.332314 | 1.052815e-04 |
| AF271998.1.1795_U | Ministeria | -7.994616 | 1.052815e-04 |
| KU891599.1.1820_U | Paraphysomonas | -4.974620 | 1.052815e-04 |
| DQ388459.1.1722_U | Prorocentrum | -3.873278 | 1.052815e-04 |
| MN945085.1.1704_U | Pedospumella | -11.108575 | 1.395728e-04 |
| MN317566.1.2078_U | Cunea | -6.252953 | 1.575477e-04 |
| JQ967291.1.1656_U | Lepidochromonas | -7.108951 | 2.894479e-04 |
| AJ549210.1.1823_U | Euplotes | -3.564333 | 3.357458e-04 |
| FJ832156.1.1753_U | Lecudina | -4.570884 | 7.868416e-04 |
| MF197366.1.2097_U | Paramoeba | -4.968286 | 9.093298e-04 |
pdf(paste0(wdir, "fig/edger-light_clean_usedVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(dge.2, aes(x=Genus, y=logFC, color=Subdivision)) +
geom_jitter(size=3, width = 0.2) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png
## 2
# test differential expression: light.clean_gas - light.used_gas
qlf <- glmQLFTest(fit, contrast = c(0,1,-1,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.01)) #FDR<0.01
## 1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
## Down 0
## NotSig 969
## Up 0
# 1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
#Down 0
#NotSig 969
#Up 0
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
qlf <- glmQLFTest(fit, contrast = c(-1/3,-1/3,-1/3,1,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
## -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
## Down 19
## NotSig 851
## Up 99
# -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
#Down 28
#NotSig 802
#Up 139
dge.3 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 99
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
| Genus | logFC | FDR | |
|---|---|---|---|
| FN669510.1.1800_U | Gyrodinium | 3.212317 | 1.006479e-07 |
| FN263034.1.1797_U | Novel-clade-2_X | 2.953095 | 1.006479e-07 |
| FR874775.1.1777_U | Micromonas | 2.864596 | 1.792524e-07 |
| KJ763236.1.1766_U | Ostreococcus | 3.485785 | 2.384636e-07 |
| AB252769.1.1676_U | MAST-12A | 2.854537 | 3.109848e-07 |
| FR874799.1.1794_U | Dino-Group-II-Clade-32_X | 3.336879 | 4.054296e-07 |
| FJ480419.1.1774_U | Strombidium | 2.955235 | 4.054296e-07 |
| AY381206.1.1722_U | Peronosporomycetes_XXX | 2.929331 | 4.054296e-07 |
| FN690411.1.1710_U | Protaspa-lineage_X | 2.809405 | 5.124073e-07 |
| FJ884690.1.1768_U | Teleaulax | 2.838814 | 1.001635e-06 |
| LC068842.1.1804_U | Biecheleria | 2.681285 | 1.001635e-06 |
| KX783613.1.1556_U | Mesodinium | 2.956847 | 1.258742e-06 |
| JX828170.1.1778_U | Leptosiphonia | 2.930587 | 1.301870e-06 |
| AY180040.1.1628_U | Chlamydodon_1 | 2.929003 | 1.453780e-06 |
| EF527106.1.1777_U | Strombidiidae_X | 2.744290 | 1.453780e-06 |
| DQ314811.1.1777_U | Cryothecomonas | 2.733860 | 1.453780e-06 |
| AY665065.1.1754_U | Dinophyceae_XXX | 2.671066 | 1.521996e-06 |
| JQ782085.1.1735_U | MAST-1C | 3.151734 | 2.017583e-06 |
| KJ763648.1.1765_U | Cryptomonadales_XX | 2.705738 | 2.033536e-06 |
| AB363063.1.1731_U | Olpidiopsis | 3.351445 | 2.358088e-06 |
| JX178886.1.1761_U | Tintinnopsis_07 | 2.752226 | 2.879685e-06 |
| FN263276.1.1796_U | Chrysochromulina | 3.113689 | 2.998362e-06 |
| AF290540.1.1819_U | Protaspa | 2.872285 | 2.998362e-06 |
| AF372763.1.1585_U | Haliphthorales_X | 3.369392 | 3.339443e-06 |
| EF100270.1.1404_U | Oblongichytrium | 2.952638 | 3.374036e-06 |
| KC753491.1.1723_U | Dysteria | 3.050703 | 5.062278e-06 |
| EF527109.1.1755_U | Ceratium | 3.020519 | 5.271572e-06 |
| JN090866.1.1795_U | Kathablepharidida_XX | 3.040629 | 5.271572e-06 |
| AJ535168.1.1802_U | Skeletonema | 2.969831 | 5.271572e-06 |
| AY129058.1.1785_U | Dino-Group-I-Clade-4_X | 2.482761 | 5.718221e-06 |
| KU715759.1.1764_U | Eutintinnus | 2.976138 | 6.266128e-06 |
| AB178865.1.1782_U | Haliphthoros | 3.337262 | 1.028597e-05 |
| U53128.1.1747_U | Rhodomonas | 2.900311 | 1.028597e-05 |
| AF427534.1.1778_U | Polysiphonia | 3.806366 | 1.143176e-05 |
| AJ506972.1.1802_U | Dinophysis | 3.163266 | 1.253527e-05 |
| HM369786.1.1085_U | Dino-Group-II_XX | 3.173840 | 1.371587e-05 |
| AHAL01000301.1474.3265_U | Emiliania | 3.182640 | 1.386124e-05 |
| AF274267.1.1752_U | Heterocapsa | 2.978867 | 1.471228e-05 |
| AY641564.1.1799_U | Alexandrium | 2.623470 | 1.756289e-05 |
| EF492490.1.1787_U | Pelagodinium | 2.972916 | 1.846733e-05 |
| KJ759274.1.1796_U | Dino-Group-II-Clade-2_X | 3.268615 | 1.953564e-05 |
| AB558956.1.1768_U | Mataza | 3.183349 | 2.290759e-05 |
| KC488352.1.1657_U | Pyramimonas | 2.835315 | 2.350127e-05 |
| FR874444.1.1814_U | MAST-12B | 3.002991 | 2.613072e-05 |
| AF022199.1.1803_U | Lepidodinium | 2.739249 | 3.989044e-05 |
| DQ310262.1.1409_U | MAST-6_X | 2.590359 | 4.090375e-05 |
| KC488480.1.1694_U | Dino-Group-II-Clade-1_X | 3.013341 | 4.516952e-05 |
| AB284572.1.1791_U | Lagenidium | 3.844777 | 5.852950e-05 |
| U33137.1.1775_U | Schottera | 3.167806 | 7.155069e-05 |
| EU418969.1.1590_U | Pentapharsodinium | 3.386642 | 7.192882e-05 |
| KJ763136.1.1804_U | Picozoa_XXXXX | 2.724075 | 7.458363e-05 |
| MK629451.1.1737_U | Syltodinium | 3.212401 | 7.458363e-05 |
| GQ375265.1.1724_U | Baffinella | 3.497448 | 8.729904e-05 |
| HG792066.1.1727_U | Ansanella | 2.195326 | 8.729904e-05 |
| AY179974.1.1730_U | Gregarinomorphea_X_GRE1_XX | 2.932607 | 1.308539e-04 |
| FR874328.1.1775_U | Bathycoccus | 2.803300 | 1.335248e-04 |
| LC322144.1.1754_U | Quadricilia | 3.981005 | 1.576621e-04 |
| AB902542.1.1694_U | PHYLL_2_X | 3.362731 | 1.639908e-04 |
| FJ832156.1.1753_U | Lecudina | 3.898038 | 1.646623e-04 |
| KJ760791.1.1789_U | Dino-Group-I-Clade-1_X | 3.505662 | 1.934064e-04 |
| KJ925256.1.1318_U | Suctoria_XX | 3.772774 | 2.156799e-04 |
| JX188322.1.1754_U | MAST-9D | 2.846355 | 2.156799e-04 |
| JN848814.1.1651_U | MAST-3J | 2.786115 | 2.156799e-04 |
| AF022192.1.1803_U | Tripos | 2.954461 | 2.668074e-04 |
| KC315810.1.1645_U | Vampyrellida_XX | 3.425579 | 4.223104e-04 |
| LC222307.1.1739_U | Spiniferites | 3.085483 | 4.223104e-04 |
| AY620311.1.1258_U | Filosa-Thecofilosea_XXX | 3.422152 | 4.384032e-04 |
| AF236793.1.1776_U | Ceramium | 2.929391 | 4.432661e-04 |
| KJ925383.1.1505_U | Thoracosphaeraceae_X | 3.792743 | 4.628649e-04 |
| FJ947040.1.1779_U | Warnowia | 3.083876 | 4.910898e-04 |
| AB698451.1.1802_U | Gymnoxanthella | 3.398731 | 4.910898e-04 |
| AM408889.1.1796_U | Paragymnodinium | 3.439374 | 4.910898e-04 |
| MH319376.1.1669_U | Frontonia | 3.087373 | 4.910898e-04 |
| EF492493.1.1788_U | Biecheleriopsis | 2.936012 | 4.910898e-04 |
| KF130175.1.1772_U | Haptophyta_Clade_HAP3_XXX | 3.860196 | 4.910898e-04 |
| AB622340.1.1685_U | op14-lineage_X | 4.236172 | 4.991459e-04 |
| JQ781913.1.1798_U | MAST-7A | 3.878236 | 5.059793e-04 |
| FJ868202.1.1749_U | Frontonia_1 | 2.849783 | 5.112764e-04 |
| AB695524.1.1779_U | Paulinella | 3.444001 | 5.331051e-04 |
| AF427531.1.1778_U | Neosiphonia | 3.265927 | 5.439223e-04 |
| KJ925201.1.1629_U | Askenasia | 3.093307 | 5.917195e-04 |
| JF794039.1.1605_U | Navicula | 2.715644 | 6.007697e-04 |
| JX967270.1.1555_U | Polykrikos | 3.340932 | 6.653621e-04 |
| KX906568.1.1634_U | Synophrya | 3.693041 | 6.772565e-04 |
| AY598674.1.1791_U | Pythium | 2.927542 | 6.772565e-04 |
| JQ782065.1.1805_U | MAST-4D | 3.647399 | 6.772565e-04 |
| HM749950.1.1540_U | Parmales_env_3A | 3.568999 | 6.772565e-04 |
| EF100331.1.1380_U | Polyplicarium | 3.351292 | 6.772565e-04 |
| EU718682.1.1770_U | Callithamnion | 2.811049 | 6.906068e-04 |
| FN263279.1.1764_U | Plagioselmis | 3.725092 | 7.171082e-04 |
| KC779515.1.1801_U | Thalassomyxa | 2.860095 | 7.650172e-04 |
| JQ904059.1.1656_U | Chlamydodon | 2.156034 | 7.916445e-04 |
| AB252760.1.1723_U | NC12B-lineage_X | 3.886625 | 7.933883e-04 |
| KC771153.1.1815_U | CCW10-lineage_X | 2.643843 | 8.494295e-04 |
| FJ865206.1.1693_U | Acineta | 4.120701 | 8.513066e-04 |
| LC189150.1.1713_U | Gephyrocapsa | 3.072182 | 8.723554e-04 |
| AJ561113.1.1822_U | Pirsonia | 4.358368 | 8.841337e-04 |
| EF527182.1.1579_U | TAGIRI1-lineage_X | 2.157436 | 8.841337e-04 |
| EU371397.1.1818_U | Ventricleftida_XX | 3.136556 | 9.035933e-04 |
dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 19
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
| Genus | logFC | FDR | |
|---|---|---|---|
| AY543062.1.1792_U | Mychonastes | -4.752675 | 4.054296e-07 |
| KC875350.1.1077_U | Dunaliella | -6.939005 | 1.301870e-06 |
| FJ946883.1.1793_U | Chlorella | -5.908957 | 1.453780e-06 |
| DQ079859.1.1794_U | Tisochrysis | -6.280275 | 2.017583e-06 |
| KY054986.1.1708_U | Nannochloropsis | -5.667449 | 5.271572e-06 |
| MT901380.1.1779_U | Parietochloris | -5.665076 | 6.930783e-06 |
| X68484.4.1753_U | Scherffelia | -5.042462 | 7.049004e-06 |
| AB080302.1.1788_U | Nannochloris | -5.024995 | 1.352865e-05 |
| KY054995.1.1744_U | Tetraselmis | -4.625811 | 3.035605e-05 |
| AB058309.1.1749_U | Picochlorum | -4.722150 | 3.258877e-05 |
| KM020184.1.1749_U | Stichococcus | -4.670212 | 6.100902e-05 |
| MT425950.1.1793_U | Muriella | -4.677366 | 7.155069e-05 |
| AY121853.1.1913_U | Vannella | -5.909288 | 1.308539e-04 |
| FJ810216.1.1797_U | Aplanochytrium | -4.402574 | 1.387752e-04 |
| EF527133.16.1840_U | Chlorodendrales_XX | -3.637523 | 2.330843e-04 |
| GQ465466.1.1758_U | Uronema | -4.132117 | 2.538969e-04 |
| JF790981.1.1777_U | Chlorellales_XX | -3.131270 | 2.668074e-04 |
| JQ315589.1.1670_U | Scenedesmus | -5.523654 | 2.937352e-04 |
| AF278744.1.2559_R | Koliella | -3.619979 | 4.262408e-04 |
res_sig <- dge.3 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-oceanVSinoculum-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Subdivision)) +
geom_jitter(size=3, width = 0.2) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png
## 2
Linear time course trends under light of genera with edgeR (minimap2)
# time course trend analysis
library(splines)
meta.light <- meta.sub %>% filter(Culture_setup == "light.clean_gas" | Culture_setup == "light.used_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Hornbaek.shallow")
X <- ns(meta.light$Time_point, df=1)
design.light <- model.matrix(~X + Inoculum, data=meta.light)
biom.18s.m.light <- prune_samples(sample_names(biom.18s.m.genus) %in% c("flaregas-4","flaregas-5","flaregas-6","flaregas-7","flaregas-8","flaregas-9","flaregas-10","flaregas-11","flaregas-12","flaregas-13","flaregas-14","flaregas-15","flaregas-16","flaregas-17","flaregas-18","flaregas-19","flaregas-20"), biom.18s.m.genus)
er <- phyloseq2edgeR(biom.18s.m.light)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.light)
plotBCV(er)
sqrt(er$common.dispersion)
## [1] 0.9885399
fit <- glmQLFit(er, design.light, robust = TRUE)
plotQLDisp(fit)
qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
## X
## Down 160
## NotSig 779
## Up 30
# X
#Down 138
#NotSig 798
#Up 33
#filter genera that where detected with Emu (filter low abundant genera)
dge.4 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.4 <- dge.4[biom.18s.genus.minimap2emu$SpeciesID,]
# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.4.u <- dge.4 %>% filter(FDR<0.01 & logFC > 1)
dge.4.u %>% nrow()
## [1] 11
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-light-genus-18s-1.pdf"), width = 15, height = 15)
par(mfrow=c(2,6))
for(i in 1:nrow(dge.4.u)) {
FlybaseID <- row.names(dge.4.u)[i]
Symbol <- dge.4.u$Genus[i]
logCPM.obs.i <- logCPM.obs[FlybaseID,]
logCPM.fit.i <- logCPM.fit[FlybaseID,]
plot(meta.light$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
lines(meta.light$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png
## 2
Linear time course trends in the dark of genera with edgeR (minimap2)
# time course trend analysis
library(splines)
meta.dark <- meta.sub %>% filter(Culture_setup == "dark.clean_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Helsingor")
X <- ns(meta.dark$Time_point, df=1)
design.dark <- model.matrix(~X + Inoculum, data=meta.dark)
biom.18s.m.dark <- prune_samples(sample_names(biom.18s.m.genus) %in% c("flaregas-11","flaregas-12","flaregas-21","flaregas-22","flaregas-23","flaregas-24","flaregas-25","flaregas-26","flaregas-27","flaregas-28","flaregas-29","flaregas-30"), biom.18s.m.genus)
er <- phyloseq2edgeR(biom.18s.m.dark)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.dark)
plotBCV(er)
sqrt(er$common.dispersion)
## [1] 0.9729365
fit <- glmQLFit(er, design.dark, robust = TRUE)
plotQLDisp(fit)
qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
## X
## Down 21
## NotSig 922
## Up 26
# X
#Down 4
#NotSig 942
#Up 23
#filter genera that where detected with Emu (filter low abundant genera)
dge.5 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.5 <- dge.5[biom.18s.genus.minimap2emu$SpeciesID,]
# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.5.u <- dge.5 %>% filter(FDR<0.01 & logFC > 1)
dge.5.u %>% nrow()
## [1] 25
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-18s-up-1.pdf"), width = 15, height = 15)
par(mfrow=c(3,5))
for(i in 1:15) {
FlybaseID <- row.names(dge.5.u)[i]
Symbol <- dge.5.u$Genus[i]
logCPM.obs.i <- logCPM.obs[FlybaseID,]
logCPM.fit.i <- logCPM.fit[FlybaseID,]
plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png
## 2
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-18s-up-2.pdf"), width = 15, height = 15)
par(mfrow=c(2,5))
for(i in 16:nrow(dge.5.u)) {
FlybaseID <- row.names(dge.5.u)[i]
Symbol <- dge.5.u$Genus[i]
logCPM.obs.i <- logCPM.obs[FlybaseID,]
logCPM.fit.i <- logCPM.fit[FlybaseID,]
plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png
## 2
Compare results of DESeq2 and edgeR on genus level
alpha <- 0.001
# test differential expression: light.clean_gas - dark.clean_gas
gene_list <- list(
DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange<1) %>% rownames,
edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC<1) %>% rownames
)
#venn.diagram(x = gene_list, filename = "venn.light-cleanVSdark-clean.down.tiff")
p.a <- venn(gene_list)
gene_list <- list(
DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange>1) %>% rownames,
edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC>1) %>% rownames
)
p.b <- venn(gene_list)
Compare light_clean_usedVSdark results of ALDEX and edgeR on genus level
alpha <- 0.001
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
gene_list <- list(
aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1,] %>% row.names(),
edgeR = dge.2 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.a <- venn(gene_list)
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[attr(p.a,"intersections")$aldex,]))
gene_list <- list(
aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1,] %>% row.names(),
edgeR = dge.2 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[attr(p.a,"intersections")$aldex,]))
Compare results of ocean VS inoculum and time course tends in edgeR on genus level
alpha <- 0.001
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
#light
gene_list <- list(
edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
edgeR.splines = dge.4 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)
gene_list <- list(
edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
edgeR.splines = dge.4 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.b <- venn(gene_list)
#dark
gene_list <- list(
edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.c <- venn(gene_list)
gene_list <- list(
edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.d <- venn(gene_list)
Semi-Parametric Rank-based approach for INference in Graphical model (SPRING)
#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
## Loading required package: SpiecEasi
##
## Attaching package: 'SpiecEasi'
## The following object is masked from 'package:MASS':
##
## fitdistr
##
net_spring <- netConstruct(biom.18s,
filtTax = "highestFreq",
filtTaxPar = list(highestFreq = 50),
measure = "spring",
measurePar = list(nlambda=50,
rep.num=50,
Rmethod = "approx"),
normMethod = "none",
zeroMethod = "none",
sparsMethod = "none",
dissFunc = "signed",
verbose = 3,
seed = 123456)
## Checking input arguments ...
## Done.
## Data filtering ...
## 967 taxa removed.
## 50 taxa and 27 samples remaining.
##
## Calculate 'spring' associations ...
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust vegan
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
xlab = "Estimated correlation",
main = "Estimated correlations after sparsification")
net_spring_unsigned <- netConstruct(data = net_spring$assoEst1,
dataType = "correlation",
sparsMethod = "threshold",
dissFunc = "unsigned",
verbose = 3)
## Checking input arguments ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_unsigned$adjaMat1, 100, ylim = c(0, 400),
xlab = "Adjacency values",
main = "Adjacencies (with \"unsigned\" transformation)")
temp.nw <- net_spring
props_spring <- netAnalyze(temp.nw,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
hubPar = "eigenvector",
weightDeg = FALSE, normDeg = FALSE)
#?summary.microNetProps
print(summary(props_spring, numbNodes = 5L))
##
## Component sizes
## ```````````````
## size: 50
## #: 1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##
## Number of components 1.00000
## Clustering coefficient 0.19151
## Modularity 0.55602
## Positive edge percentage 81.25000
## Edge density 0.06531
## Natural connectivity 0.02568
## Vertex connectivity 1.00000
## Edge connectivity 1.00000
## Average dissimilarity* 0.97854
## Average path length** 2.70507
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
##
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ````````````````````````````````
##
## name: 1 2 3 4 5 6 7
## #: 7 7 7 9 6 5 9
##
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````
## 141540
## 153179
## 24038
##
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##
## 153179 8
## 141540 6
## 163227 5
## 135033 5
## 163252 5
##
## Betweenness centrality (normalized):
##
## 162601 0.16497
## 163227 0.16327
## 153179 0.15051
## 140161 0.15051
## 40675 0.13520
##
## Closeness centrality (normalized):
##
## 153179 0.64102
## 141540 0.62472
## 40675 0.60451
## 140161 0.58343
## 163227 0.57828
##
## Eigenvector centrality (normalized):
##
## 153179 1.00000
## 141540 0.91042
## 24038 0.59247
## 6548 0.56813
## 40675 0.52545
pdf(paste0(wdir, "fig/network-spring-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring,
edgeInvisFilter = "threshold",
edgeInvisPar = 0.01,
nodeColor = "cluster",
nodeSize = "eigenvector",
repulsion = 0.8,
rmSingles = TRUE,
labelScale = FALSE,
cexLabels = 1.6,
nodeSizeSpread = 3,
cexNodes = 2,
hubBorderCol = "darkgray",
title1 = "Network on species level with SPRING associations",
showTitle = TRUE,
cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"),
bty = "n", horiz = FALSE)
dev.off()
## png
## 2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## 75%
## 0.342628
biom.18s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.18s.genus), decreasing = TRUE)[1:50]), biom.18s.genus)
tax_table(biom.18s.genus.top50) <- tax_table(biom.18s.genus.top50) %>% as.data.frame() %>% mutate(Genus = str_remove(Genus, "_X*$")) %>% as.matrix()
net_spring_genus <- netConstruct(biom.18s.genus.top50,
#filtTax = "highestFreq",
#filtTaxPar = list(highestFreq = 50),
taxRank = "Genus",
measure = "spring",
measurePar = list(nlambda=50,
rep.num=50,
Rmethod = "approx"),
normMethod = "none",
zeroMethod = "none",
sparsMethod = "none",
dissFunc = "signed",
verbose = 3,
seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 27 samples remaining.
##
## Calculate 'spring' associations ...
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
xlab = "Estimated correlation",
main = "Estimated correlations after sparsification")
net_spring_genus_unsigned <- netConstruct(data = net_spring_genus$assoEst1,
dataType = "correlation",
sparsMethod = "threshold",
thresh = 0.3,
dissFunc = "unsigned",
verbose = 3)
## Checking input arguments ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
xlab = "Adjacency values",
main = "Adjacencies (with \"unsigned\" transformation)")
temp.nw <- net_spring_genus
props_spring_genus <- netAnalyze(temp.nw,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
#hubPar = "eigenvector",
hubPar = "betweenness",
hubQuant = 0.9,
weightDeg = FALSE, normDeg = FALSE)
#?summary.microNetProps
print(summary(props_spring_genus, numbNodes = 5L))
##
## Component sizes
## ```````````````
## size: 50
## #: 1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##
## Number of components 1.00000
## Clustering coefficient 0.21232
## Modularity 0.56673
## Positive edge percentage 83.72093
## Edge density 0.07020
## Natural connectivity 0.02633
## Vertex connectivity 1.00000
## Edge connectivity 1.00000
## Average dissimilarity* 0.97610
## Average path length** 2.63212
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
##
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ````````````````````````````````
##
## name: 1 2 3 4 5 6
## #: 9 9 12 6 9 5
##
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````
## Caecitellus
## Heterocapsa
## Protaspa-lineage
## Rimostrombidium
## Telonemia-Group-1
##
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##
## Rimostrombidium 8
## Philasterida 6
## Telonemia-Group-1 6
## Lepidodinium 5
## Aspidisca 5
##
## Betweenness centrality (normalized):
##
## Rimostrombidium 0.27296
## Protaspa-lineage 0.24490
## Telonemia-Group-1 0.20238
## Caecitellus 0.17687
## Heterocapsa 0.16922
##
## Closeness centrality (normalized):
##
## Rimostrombidium 0.65456
## Caecitellus 0.62236
## Telonemia-Group-1 0.58551
## Caecitellaceae 0.57356
## Protaspa-lineage 0.56798
##
## Eigenvector centrality (normalized):
##
## Rimostrombidium 1.00000
## Novel-Gran-2 0.87898
## Spumella 0.84978
## Philasterida 0.81381
## Aspidisca 0.80364
pdf(paste0(wdir, "fig/network-spring-genus-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus,
edgeInvisFilter = "threshold",
edgeInvisPar = 0.01,
nodeColor = "cluster",
colorVec = c("#E69F00", "#56B4E9", "#4daf4a", "#009960", "#CC79A7", "#804000", "#999999"),
nodeSize = "eigenvector",
repulsion = 0.8,
rmSingles = TRUE,
labelScale = FALSE,
cexLabels = 2.4,
nodeSizeSpread = 3,
cexNodes = 2,
hubBorderCol = "darkgray",
#title1 = "Network on species level with SPRING associations",
showTitle = FALSE,
#cexTitle = 2.3,
posCol = "#0072B2",
negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.4, title = "estimated association:",
legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'),
bty = "n", horiz = FALSE)
dev.off()
## png
## 2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## 75%
## 0.3820446
knitr::kable(as.matrix(sort(colSums(net_spring_genus$normCounts1), decreasing = TRUE)[1:20]))
| Picochlorum | 693010.57 |
| Tetraselmis | 225314.66 |
| Nannochloris | 202237.24 |
| Chlamydomonas | 194650.26 |
| Aspidisca | 115217.73 |
| Rimostrombidium | 111952.60 |
| Chlamydomonadales | 67824.76 |
| Bicoecea | 59376.22 |
| Spumella | 35457.50 |
| Paraphysomonas | 34067.75 |
| Chlorodendrales | 33343.15 |
| Chlamydodon | 31290.70 |
| Heterocapsa | 31027.28 |
| Philasterida | 29921.38 |
| Lepidodinium | 24603.54 |
| Caecitellus | 22679.89 |
| Aplanochytrium | 21554.37 |
| Novel-Gran-2 | 19322.87 |
| Dino-Group-II-Clade-2 | 13027.69 |
| Alexandrium | 11021.24 |
SPRING for time point 11 hours
#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
biom.18s.genus.top50.11hr <- prune_samples(sample_data(biom.18s.genus.top50) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% filter(Time_point == 11) %>% rownames(), biom.18s.genus.top50)
net_spring_genus.11 <- netConstruct(biom.18s.genus.top50.11hr,
#filtTax = "highestFreq",
#filtTaxPar = list(highestFreq = 50),
taxRank = "Genus",
measure = "spring",
measurePar = list(nlambda=50,
rep.num=50,
Rmethod = "approx"),
normMethod = "none",
zeroMethod = "none",
sparsMethod = "none",
dissFunc = "signed",
verbose = 3,
seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 8 samples remaining.
##
## Calculate 'spring' associations ...
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus.11$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
xlab = "Estimated correlation",
main = "Estimated correlations after sparsification")
temp.nw <- net_spring_genus.11
props_spring_genus.11 <- netAnalyze(temp.nw,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
hubPar = "eigenvector",
weightDeg = FALSE, normDeg = FALSE)
#?summary.microNetProps
print(summary(props_spring_genus.11, numbNodes = 5L))
##
## Component sizes
## ```````````````
## size: 23 6 3 2 1
## #: 1 1 3 1 10
## ______________________________
## Global network properties
## `````````````````````````
## Largest connected component (LCC):
##
## Relative LCC size 0.46000
## Clustering coefficient 0.00000
## Modularity 0.60744
## Positive edge percentage 77.27273
## Edge density 0.08696
## Natural connectivity 0.05491
## Vertex connectivity 1.00000
## Edge connectivity 1.00000
## Average dissimilarity* 0.97303
## Average path length** 3.39738
##
## Whole network:
##
## Number of components 16.00000
## Clustering coefficient 0.22232
## Modularity 0.77006
## Positive edge percentage 86.11111
## Edge density 0.02939
## Natural connectivity 0.02327
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
##
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ````````````````````````````````
##
## name: 0 1 2 3 4 5 6 7 8 9 10
## #: 10 6 5 5 5 4 4 3 3 3 2
##
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````
## Chlorodendrales
## Nannochloris
## Spumella
##
## ______________________________
## Centrality measures
## - In decreasing order
## - Centrality of disconnected components is zero
## ````````````````````````````````````````````````
## Degree (unnormalized):
##
## Aspidisca 3
## Strombidium 3
## Flabellula 3
## Chlorodendrales 3
## Nannochloris 3
##
## Betweenness centrality (normalized):
##
## Spumella 0.68831
## Chlorodendrales 0.53680
## Aspidisca 0.43723
## Nannochloris 0.38528
## Paraphysomonas 0.36797
##
## Closeness centrality (normalized):
##
## Spumella 0.58881
## Chlorodendrales 0.57152
## Aspidisca 0.54571
## Nannochloris 0.53206
## Paraphysomonas 0.51210
##
## Eigenvector centrality (normalized):
##
## Chlorodendrales 1.00000
## Spumella 0.99112
## Nannochloris 0.87986
## Aspidisca 0.80774
## Paraphysomonas 0.63081
pdf(paste0(wdir, "fig/network-spring-genus-11hr-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus.11,
edgeInvisFilter = "threshold",
edgeInvisPar = 0.01,
nodeColor = "cluster",
colorVec = c("#804000", "#56B4E9", "#009960", "#999999", "#357090", "#CC79A7", "#E69F00"),
nodeSize = "eigenvector",
repulsion = 0.8,
rmSingles = TRUE,
labelScale = FALSE,
cexLabels = 1.6,
nodeSizeSpread = 3,
cexNodes = 2,
hubBorderCol = "darkgray",
#title1 = "Network on genus level with Pearson correlations",
showTitle = FALSE,
#cexTitle = 2.3,
posCol = "#0072B2",
negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'),
bty = "n", horiz = FALSE)
dev.off()
## png
## 2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## 75%
## 0.3415613
knitr::kable(as.matrix(sort(colSums(net_spring_genus.11$normCounts1), decreasing = TRUE)[1:20]))
| Picochlorum | 263986.958 |
| Nannochloris | 137097.613 |
| Chlamydomonas | 126936.645 |
| Tetraselmis | 69974.078 |
| Chlamydomonadales | 57486.777 |
| Aspidisca | 50485.400 |
| Paraphysomonas | 22702.979 |
| Chlorodendrales | 17430.882 |
| Novel-Gran-2 | 13658.268 |
| Rimostrombidium | 4439.017 |
| Aplanochytrium | 3115.442 |
| Perkinsida | 2678.884 |
| Bicoecea | 1996.169 |
| Vannella | 1939.600 |
| Philasterida | 1936.981 |
| Chlorococcum | 1914.004 |
| Euplotes | 1399.536 |
| Flabellula | 1373.835 |
| Heterocapsa | 1225.493 |
| Chlamydodon | 1103.903 |
Pearson’s correlation
#While with the “signed” transformation, positive correlated taxa are likely to belong to the same cluster, with the “unsigned” transformation clusters contain strongly positive and negative correlated taxa.
net_pears <- netConstruct(biom.18s,
filtTax = "highestFreq",
filtTaxPar = list(highestFreq = 50),
measure = "pearson",
normMethod = "clr",
zeroMethod = "multRepl",
sparsMethod = "threshold",
thresh = 0.3,
dissFunc = "signed",
verbose = 3)
## Checking input arguments ... Done.
## Data filtering ...
## 967 taxa removed.
## 50 taxa and 27 samples remaining.
##
## Zero treatment:
## Execute multRepl() ... Done.
##
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
##
## Calculate 'pearson' associations ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
xlab = "Estimated correlation",
main = "Estimated correlations after sparsification")
net_pears_unsigned <- netConstruct(data = net_pears$assoEst1,
dataType = "correlation",
sparsMethod = "threshold",
thresh = 0.3,
dissFunc = "unsigned",
verbose = 3)
## Checking input arguments ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_unsigned$adjaMat1, 100, ylim = c(0, 400),
xlab = "Adjacency values",
main = "Adjacencies (with \"unsigned\" transformation)")
temp.nw <- net_pears
props_pears <- netAnalyze(temp.nw,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
hubPar = "eigenvector",
weightDeg = FALSE, normDeg = FALSE
)
#?summary.microNetProps
print(summary(props_pears, numbNodes = 5L))
##
## Component sizes
## ```````````````
## size: 50
## #: 1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##
## Number of components 1.00000
## Clustering coefficient 0.71660
## Modularity 0.03747
## Positive edge percentage 42.19653
## Edge density 0.56490
## Natural connectivity 0.16225
## Vertex connectivity 5.00000
## Edge connectivity 5.00000
## Average dissimilarity* 0.81463
## Average path length** 1.03688
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
##
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ````````````````````````````````
##
## name: 1 2 3
## #: 13 23 14
##
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````
## 141540
## 153179
## 156707
##
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##
## 203538 40
## 160777 39
## 162601 38
## 163392 37
## 159937 36
##
## Betweenness centrality (normalized):
##
## 45558 0.06548
## 203538 0.02466
## 82603 0.02211
## 159937 0.01871
## 162601 0.01701
##
## Closeness centrality (normalized):
##
## 141540 1.56384
## 153179 1.54227
## 156707 1.49398
## 68906 1.44441
## 36687 1.43924
##
## Eigenvector centrality (normalized):
##
## 141540 1.00000
## 156707 0.97978
## 153179 0.96949
## 68906 0.96553
## 156946 0.95502
pdf(paste0(wdir, "fig/network-pearson-18s.pdf"), width = 30, height = 15)
p <- plot(props_pears,
edgeInvisFilter = "threshold",
edgeInvisPar = 0.001,
nodeColor = "cluster",
nodeSize = "eigenvector",
repulsion = 0.8,
rmSingles = TRUE,
labelScale = FALSE,
cexLabels = 1.6,
nodeSizeSpread = 3,
cexNodes = 2,
hubBorderCol = "darkgray",
title1 = "Network on species level with Pearson correlations",
showTitle = TRUE,
cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"),
bty = "n", horiz = FALSE)
dev.off()
## png
## 2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## [1] 0.6214397
biom.18s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.18s.genus), decreasing = TRUE)[1:50]), biom.18s.genus)
tax_table(biom.18s.genus.top50) <- tax_table(biom.18s.genus.top50) %>% as.data.frame() %>% mutate(Genus = str_remove(Genus, "_X*$")) %>% as.matrix()
net_pears_genus <- netConstruct(biom.18s.genus.top50,
taxRank = "Genus",
measure = "pearson",
zeroMethod = "multRepl",
normMethod = "clr",
sparsMethod = "threshold",
thresh = 0.3,
verbose = 3)
## Checking input arguments ... Done.
## 50 taxa and 27 samples remaining.
##
## Zero treatment:
## Execute multRepl() ... Done.
##
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
##
## Calculate 'pearson' associations ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
xlab = "Estimated correlation",
main = "Estimated correlations after sparsification")
net_pears_genus_unsigned <- netConstruct(data = net_pears_genus$assoEst1,
dataType = "correlation",
sparsMethod = "threshold",
thresh = 0.3,
dissFunc = "unsigned",
verbose = 3)
## Checking input arguments ... Done.
##
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
xlab = "Adjacency values",
main = "Adjacencies (with \"unsigned\" transformation)")
temp.nw <- net_pears_genus
props_pears_genus <- netAnalyze(temp.nw,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
hubPar = "eigenvector",
weightDeg = FALSE, normDeg = FALSE)
#?summary.microNetProps
print(summary(props_pears_genus, numbNodes = 5L))
##
## Component sizes
## ```````````````
## size: 50
## #: 1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##
## Number of components 1.00000
## Clustering coefficient 0.72005
## Modularity 0.06084
## Positive edge percentage 46.69540
## Edge density 0.56816
## Natural connectivity 0.18639
## Vertex connectivity 6.00000
## Edge connectivity 6.00000
## Average dissimilarity* 0.80731
## Average path length** 1.03226
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
##
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ````````````````````````````````
##
## name: 1 2 3 4
## #: 12 7 17 14
##
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````
## MAST-12A
## Strombidiidae
## Teleaulax
##
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##
## Strombidiidae 42
## Teleaulax 40
## Strombidium 38
## Dinophyceae 37
## Ostreococcus 37
##
## Betweenness centrality (normalized):
##
## Chlorococcum 0.03827
## Nannochloris 0.02806
## Strombidiidae 0.02381
## Lepidodinium 0.02296
## Dino-Group-I-Clade-4 0.02041
##
## Closeness centrality (normalized):
##
## Strombidiidae 1.52595
## MAST-12A 1.51207
## Strombidium 1.49981
## Dino-Group-I-Clade-4 1.47805
## Ostreococcus 1.47145
##
## Eigenvector centrality (normalized):
##
## Strombidiidae 1.00000
## MAST-12A 0.97247
## Teleaulax 0.95853
## Ostreococcus 0.94598
## Strombidium 0.94585
pdf(paste0(wdir, "fig/network-pearson-genus-18s.pdf"), width = 30, height = 15)
p <- plot(props_pears_genus,
edgeInvisFilter = "threshold",
edgeInvisPar = 0.5,
nodeColor = "cluster",
colorVec = c("#009960", "#CC79A7", "#357090", "#804000"),
nodeSize = "eigenvector",
repulsion = 0.8,
rmSingles = TRUE,
labelScale = FALSE,
cexLabels = 1.6,
nodeSizeSpread = 3,
cexNodes = 2,
hubBorderCol = "darkgray",
#title1 = "Network on genus level with Pearson correlations",
showTitle = FALSE,
#cexTitle = 2.3)
posCol = "#0072B2",
negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'),
bty = "n", horiz = FALSE)
dev.off()
## png
## 2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## [1] 0.6300445
knitr::kable(as.matrix(sort(colSums(net_pears_genus$normCounts1), decreasing = TRUE)[1:20]))
| Picochlorum | 126.1917044 |
| Tetraselmis | 88.5317644 |
| Nannochloris | 83.3043019 |
| Chlamydomonas | 54.2694073 |
| Rimostrombidium | 36.6920230 |
| Chlorodendrales | 34.1162137 |
| Aspidisca | 24.2932602 |
| Chlamydomonadales | 19.7186379 |
| Aplanochytrium | 16.3574961 |
| Chlamydodon | 12.9062433 |
| Paraphysomonas | 11.4957146 |
| Heterocapsa | 3.5317635 |
| Lepidodinium | 0.9101461 |
| Vannella | -2.1349854 |
| Euplotes | -2.5459088 |
| Bicoecea | -2.9654446 |
| Novel-Gran-2 | -3.4083407 |
| Philasterida | -6.0462936 |
| Caecitellus | -6.6927218 |
| Kryptoperidinium | -7.9204505 |
#lightVSocean: 8
my.genus.1 <- res_sig.aldex.lightVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.1$lightVSocean <- 1
#lightVSdark: 15
my.genus.2 <- tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.2$lightVSdark <- 1
#light.time.trend: 11
my.genus.3 <- dge.4.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.3$light.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.1, y = my.genus.2, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.3, by = "Genus", all = TRUE)
biom.18s.genus.1 <- biom.18s.genus
sample_data(biom.18s.genus.1) <- sample_data(biom.18s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.18s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.light.18s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.light.18s))
| Genus | lightVSocean | lightVSdark | light.time.trend | ocean_0 | light+clean_gas_5 | light+clean_gas_7 | light+clean_gas_11 | light+clean_gas_14 | light+used_gas_5 | light+used_gas_7 | light+used_gas_11 | dark+clean_gas_5 | dark+clean_gas_7 | dark+clean_gas_11 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Catena | NA | NA | 1 | 0.000000e+00 | 0.0003379009 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0001782844 | 0.0000000000 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 5.657273e-05 |
| Chlorella | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 1.149772e-03 | 0.0000000000 | 0.0000000000 | 7.947646e-05 |
| Chlorellales_XX | 1 | NA | NA | 7.063648e-04 | 0.0046902999 | 5.041327e-04 | 4.516628e-04 | 0.0005916876 | 0.0046259276 | 0.0005658150 | 4.188787e-04 | 0.0000000000 | 0.0000000000 | 4.898143e-04 |
| Chlorodendrales_XX | 1 | 1 | NA | 1.392823e-02 | 0.0049646976 | 1.895699e-02 | 1.494706e-02 | 0.0051025601 | 0.0053314150 | 0.0211667699 | 4.223737e-02 | 0.0016048494 | 0.0007600987 | 4.753357e-03 |
| Dunaliella | 1 | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 1.751160e-04 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 5.038340e-05 | 0.0000000000 | 0.0000000000 | 0.000000e+00 |
| Moneuplotes | NA | NA | 1 | 0.000000e+00 | 0.0004341113 | 2.292957e-04 | 4.912807e-04 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 1.708029e-04 | 0.0000000000 | 0.0009557372 | 8.791467e-05 |
| Nannochloris | 1 | NA | 1 | 2.828345e-02 | 0.0283297264 | 5.110312e-02 | 1.023234e-01 | 0.0426235538 | 0.0794856715 | 0.1836846907 | 2.750722e-01 | 0.0056415730 | 0.0058656923 | 1.059089e-01 |
| Nannochloropsis | 1 | NA | 1 | 0.000000e+00 | 0.0002302709 | 4.918582e-04 | 0.000000e+00 | 0.0011175651 | 0.0002165651 | 0.0000000000 | 4.838714e-05 | 0.0000000000 | 0.0000000000 | 4.529654e-04 |
| Picochlorum | 1 | NA | 1 | 1.217472e-01 | 0.2477550416 | 4.788249e-01 | 3.186011e-01 | 0.6424564080 | 0.4252928376 | 0.5368042925 | 3.380542e-01 | 0.0141721002 | 0.0304777948 | 3.292703e-01 |
| Scenedesmus | NA | NA | 1 | 6.565928e-05 | 0.0012813198 | 5.262908e-04 | 7.129171e-04 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 3.488916e-04 | 0.0000000000 | 0.0000000000 | 3.452883e-04 |
| Stichococcus | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 3.358388e-05 | 7.903332e-05 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.000000e+00 |
| Tetraselmis | 1 | 1 | 1 | 2.983400e-02 | 0.0578165933 | 2.160364e-01 | 4.382103e-02 | 0.2877727088 | 0.0579401149 | 0.1589012787 | 1.520716e-01 | 0.0028844458 | 0.0107986014 | 4.702055e-02 |
| Tisochrysis | 1 | 1 | NA | 0.000000e+00 | 0.0000000000 | 4.610344e-04 | 7.707019e-05 | 0.0008392871 | 0.0000000000 | 0.0001981126 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.000000e+00 |
| Uronema | NA | NA | 1 | 8.885796e-05 | 0.0003462671 | 4.739865e-04 | 4.149361e-03 | 0.0065497112 | 0.0005253322 | 0.0000000000 | 0.000000e+00 | 0.0001357417 | 0.0004207827 | 6.370553e-05 |
#darkVSocean: 7
my.genus.4 <- res_sig.aldex.darkVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.4$darkVSocean <- 1
#darkVSlight: 24
my.genus.5 <- tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.5$darkVSlight <- 1
#dark.time.trend: 25
my.genus.6 <- dge.5.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.6$dark.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.4, y = my.genus.5, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.6, by = "Genus", all = TRUE)
temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.dark.18s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.dark.18s))
| Genus | darkVSocean | darkVSlight | dark.time.trend | ocean_0 | light+clean_gas_5 | light+clean_gas_7 | light+clean_gas_11 | light+clean_gas_14 | light+used_gas_5 | light+used_gas_7 | light+used_gas_11 | dark+clean_gas_5 | dark+clean_gas_7 | dark+clean_gas_11 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Aplanochytrium | NA | NA | 1 | 3.383009e-03 | 0.0413115407 | 2.956411e-02 | 1.131662e-03 | 0.0004549487 | 0.0133246541 | 0.0062936916 | 2.219848e-03 | 1.997691e-03 | 0.0066310122 | 7.312849e-03 |
| Aspidisca | 1 | 1 | NA | 7.740166e-03 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0015227940 | 0.0000000000 | 0.000000e+00 | 2.018106e-01 | 0.2277381156 | 1.669369e-01 |
| Chlamydomonadales_XX | NA | NA | 1 | 2.390505e-03 | 0.0060157993 | 3.394457e-03 | 5.491708e-03 | 0.0000000000 | 0.0269921719 | 0.0458881429 | 1.608431e-01 | 7.408895e-04 | 0.0022946003 | 1.981292e-02 |
| Chlamydomonas | NA | NA | 1 | 1.207866e-02 | 0.0716771601 | 1.560514e-01 | 4.863801e-01 | 0.0001052672 | 0.0110994902 | 0.0141732120 | 1.174826e-02 | 3.676280e-03 | 0.0151075989 | 1.132940e-01 |
| Chlorella | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 1.149772e-03 | 0.000000e+00 | 0.0000000000 | 7.947646e-05 |
| Chlorococcum | NA | NA | 1 | 1.411588e-04 | 0.0172536419 | 2.918188e-03 | 1.953060e-03 | 0.0000000000 | 0.0010855725 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0012145155 | 5.147376e-03 |
| Cochliopodium | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0001271934 | 0.0000000000 | 0.000000e+00 | 7.197856e-05 | 0.0003639988 | 1.785318e-04 |
| Dunaliella | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 1.751160e-04 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 5.038340e-05 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 |
| Mayorella | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 3.173417e-04 |
| Ministeria | NA | 1 | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0001608826 | 2.972564e-03 |
| Mychonastes | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 1.026474e-03 | 0.000000e+00 | 0.0013384336 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 |
| Nannochloris | NA | NA | 1 | 2.828345e-02 | 0.0283297264 | 5.110312e-02 | 1.023234e-01 | 0.0426235538 | 0.0794856715 | 0.1836846907 | 2.750722e-01 | 5.641573e-03 | 0.0058656923 | 1.059089e-01 |
| Nannochloropsis | NA | NA | 1 | 0.000000e+00 | 0.0002302709 | 4.918582e-04 | 0.000000e+00 | 0.0011175651 | 0.0002165651 | 0.0000000000 | 4.838714e-05 | 0.000000e+00 | 0.0000000000 | 4.529654e-04 |
| Novel-Gran-1_X | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 7.508567e-05 | 0.0001195329 | 4.704850e-04 |
| Novel-Gran-2_X | 1 | 1 | 1 | 1.004975e-03 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 2.963091e-03 | 0.0397310904 | 4.516295e-02 |
| Novel-Gran-6_X | 1 | 1 | 1 | 3.152808e-04 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 4.098540e-04 | 0.0008177671 | 2.654930e-03 |
| Ochromonadales_XX | 1 | 1 | NA | 1.489642e-03 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 1.907350e-03 | 0.0525614854 | 1.675446e-03 |
| Paraflabellula | NA | 1 | NA | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0006258056 | 5.711322e-05 |
| Paramoeba | 1 | 1 | NA | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0012742638 | 3.227980e-04 |
| Paraphysomonas | NA | NA | 1 | 1.877638e-02 | 0.0042827382 | 1.063238e-04 | 1.541404e-04 | 0.0000000000 | 0.0025345895 | 0.0000000000 | 2.003796e-04 | 3.085954e-03 | 0.0229186572 | 7.476929e-02 |
| Pedospumella | 1 | 1 | NA | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0010477237 | 0.000000e+00 |
| Perkinsida_XXX | NA | NA | 1 | 5.582823e-04 | 0.0048992100 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 1.201850e-04 | 0.0001778177 | 8.858099e-03 |
| Picochlorum | NA | NA | 1 | 1.217472e-01 | 0.2477550416 | 4.788249e-01 | 3.186011e-01 | 0.6424564080 | 0.4252928376 | 0.5368042925 | 3.380542e-01 | 1.417210e-02 | 0.0304777948 | 3.292703e-01 |
| Prorocentrum | NA | 1 | 1 | 6.040541e-04 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 1.052576e-04 | 0.0000000000 | 2.182699e-04 |
| Pseudovorticella | NA | NA | 1 | 1.233169e-04 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 6.650289e-05 | 0.0000000000 | 1.061887e-04 |
| Scenedesmus | NA | NA | 1 | 6.565928e-05 | 0.0012813198 | 5.262908e-04 | 7.129171e-04 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 3.488916e-04 | 0.000000e+00 | 0.0000000000 | 3.452883e-04 |
| Sessilida_X | NA | 1 | NA | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 3.143181e-04 | 0.0008128807 | 1.588315e-03 |
| Spongiococcum | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 | 1.363091e-04 | 0.0000000000 | 0.0009046367 | 0.0007896674 | 3.338583e-04 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 |
| Spumella | 1 | 1 | NA | 1.752399e-03 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 6.101571e-03 | 0.2763408546 | 1.882008e-03 |
| Stichococcus | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 3.358388e-05 | 7.903332e-05 | 0.0000000000 | 0.0000000000 | 0.0000000000 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 |
| Tetraselmis | NA | NA | 1 | 2.983400e-02 | 0.0578165933 | 2.160364e-01 | 4.382103e-02 | 0.2877727088 | 0.0579401149 | 0.1589012787 | 1.520716e-01 | 2.884446e-03 | 0.0107986014 | 4.702055e-02 |
| Tisochrysis | NA | NA | 1 | 0.000000e+00 | 0.0000000000 | 4.610344e-04 | 7.707019e-05 | 0.0008392871 | 0.0000000000 | 0.0001981126 | 0.000000e+00 | 0.000000e+00 | 0.0000000000 | 0.000000e+00 |
biom.18s.genus.1 <- biom.18s.genus
sample_data(biom.18s.genus.1) <- sample_data(biom.18s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.18s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund.all <- temp.genus %>% dplyr::select(Sample,Abundance,Genus)
rel.abund.18s <- reshape2::acast(temp.abund.all, Genus~Sample, value.var = "Abundance")
write.table(as.matrix(rel.abund.18s), file = paste0(wdir, "tab-rel-abund-culture-time-18s.csv"), col.names = T, row.names = T, sep = ",", quote = F)
#save workspace
save.image(paste0(wdir, "/phyloseq-18s.Rdata"))
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] NetCoMi_1.1.0 SpiecEasi_1.1.3
## [3] DirichletMultinomial_1.46.0 ALDEx2_1.36.0
## [5] latticeExtra_0.6-30 zCompositions_1.5.0-3
## [7] truncnorm_1.0-9 NADA_1.6-1.1
## [9] survival_3.6-4 MASS_7.3-60.2
## [11] microbiome_1.26.0 factoextra_1.0.7
## [13] FactoMineR_2.11 ggbiplot_0.6.2
## [15] knitr_1.47 ggpubr_0.6.0
## [17] patchwork_1.2.0 gplots_3.1.3.1
## [19] edgeR_4.2.0 limma_3.60.2
## [21] DESeq2_1.44.0 SummarizedExperiment_1.34.0
## [23] Biobase_2.64.0 MatrixGenerics_1.16.0
## [25] matrixStats_1.3.0 GenomicRanges_1.56.0
## [27] GenomeInfoDb_1.40.1 IRanges_2.38.0
## [29] S4Vectors_0.42.0 BiocGenerics_0.50.0
## [31] vegan_2.6-6.1 lattice_0.22-6
## [33] permute_0.9-7 ggrepel_0.9.5
## [35] RColorBrewer_1.1-3 psadd_0.1.3
## [37] reshape2_1.4.4 phyloseq_1.48.0
## [39] lubridate_1.9.3 forcats_1.0.0
## [41] stringr_1.5.1 dplyr_1.1.4
## [43] purrr_1.0.2 readr_2.1.5
## [45] tidyr_1.3.1 tibble_3.2.1
## [47] tidyverse_2.0.0 ggplot2_3.5.1
##
## loaded via a namespace (and not attached):
## [1] nnet_7.3-19 DT_0.33 Biostrings_2.72.1
## [4] TH.data_1.1-2 vctrs_0.6.5 digest_0.6.35
## [7] png_0.1-8 corpcor_1.6.10 shape_1.4.6.1
## [10] pcaPP_2.0-4 registry_0.5-1 corrplot_0.92
## [13] deldir_2.0-4 parallelly_1.37.1 SPRING_1.0.4
## [16] foreach_1.5.2 withr_3.0.0 psych_2.4.3
## [19] xfun_0.44 doRNG_1.8.6 memoise_2.0.1
## [22] emmeans_1.10.2 latentcor_2.0.1 zoo_1.8-12
## [25] gtools_3.9.5 pbapply_1.7-2 Formula_1.2-5
## [28] KEGGREST_1.44.0 scatterplot3d_0.3-44 httr_1.4.7
## [31] rstatix_0.7.2 globals_0.16.3 rhdf5filters_1.16.0
## [34] rhdf5_2.48.0 rstudioapi_0.16.0 UCSC.utils_1.0.0
## [37] generics_0.1.3 base64enc_0.1-3 zlibbioc_1.50.0
## [40] ca_0.71.1 doSNOW_1.0.20 RcppZiggurat_0.1.6
## [43] GenomeInfoDbData_1.2.12 quadprog_1.5-8 SparseArray_1.4.8
## [46] xtable_1.8-4 ade4_1.7-22 doParallel_1.0.17
## [49] evaluate_0.23 S4Arrays_1.4.1 Rfast_2.1.0
## [52] preprocessCore_1.66.0 hms_1.1.3 glmnet_4.1-8
## [55] pulsar_0.3.11 irlba_2.3.5.1 colorspace_2.1-0
## [58] magrittr_2.0.3 viridis_0.6.5 future.apply_1.11.2
## [61] cowplot_1.1.3 Hmisc_5.1-3 pillar_1.9.0
## [64] nlme_3.1-164 huge_1.3.5 iterators_1.0.14
## [67] caTools_1.18.2 compiler_4.4.1 stringi_1.8.4
## [70] biomformat_1.32.0 TSP_1.2-4 dendextend_1.17.1
## [73] plyr_1.8.9 crayon_1.5.2 abind_1.4-5
## [76] timeSeries_4032.109 sn_2.1.1 locfit_1.5-9.9
## [79] bit_4.0.5 rootSolve_1.8.2.4 mixedCCA_1.6.2
## [82] sandwich_3.1-0 fastcluster_1.2.6 codetools_0.2-20
## [85] multcomp_1.4-25 directlabels_2024.1.21 bslib_0.7.0
## [88] plotly_4.10.4 multtest_2.60.0 Rcpp_1.0.12
## [91] qgraph_1.9.8 magic_1.6-1 interp_1.1-6
## [94] leaps_3.1 blob_1.2.4 utf8_1.2.4
## [97] fBasics_4032.96 pbivnorm_0.6.0 listenv_0.9.1
## [100] checkmate_2.3.1 Rdpack_2.6 ggsignif_0.6.4
## [103] estimability_1.5.1 lavaan_0.6-17 Matrix_1.7-0
## [106] statmod_1.5.0 tzdb_0.4.0 pkgconfig_2.0.3
## [109] tools_4.4.1 cachem_1.1.0 rbibutils_2.2.16
## [112] RSQLite_2.3.7 viridisLite_0.4.2 DBI_1.2.3
## [115] numDeriv_2016.8-1.1 impute_1.78.0 fastmap_1.2.0
## [118] rmarkdown_2.27 scales_1.3.0 grid_4.4.1
## [121] fMultivar_4031.84 broom_1.0.6 sass_0.4.9
## [124] coda_0.19-4.1 carData_3.0-5 rpart_4.1.23
## [127] snow_0.4-4 farver_2.1.2 mgcv_1.9-1
## [130] yaml_2.3.8 VGAM_1.1-11 spatial_7.3-17
## [133] foreign_0.8-86 cli_3.6.2 webshot_0.5.5
## [136] lifecycle_1.0.4 mvtnorm_1.2-5 backports_1.5.0
## [139] BiocParallel_1.38.0 timechange_0.3.0 gtable_0.3.5
## [142] parallel_4.4.1 ape_5.8 jsonlite_1.8.8
## [145] seriation_1.5.5 bitops_1.0-7 multcompView_0.1-10
## [148] bit64_4.0.5 assertthat_0.2.1 Rtsne_0.17
## [151] glasso_1.11 doFuture_1.0.1 heatmaply_1.5.0
## [154] RcppParallel_5.1.7 jquerylib_0.1.4 highr_0.11
## [157] timeDate_4032.109 orca_1.1-2 lazyeval_0.2.2
## [160] dynamicTreeCut_1.63-1 htmltools_0.5.8.1 GO.db_3.19.1
## [163] glue_1.7.0 XVector_0.44.0 microbenchmark_1.4.10
## [166] mnormt_2.1.1 jpeg_0.1-10 gridExtra_2.3
## [169] flashClust_1.01-2 igraph_2.0.3 R6_2.5.1
## [172] fdrtool_1.2.17 labeling_0.4.3 cluster_2.1.6
## [175] rngtools_1.5.2 Rhdf5lib_1.26.0 DelayedArray_0.30.1
## [178] tidyselect_1.2.1 plotrix_3.8-4 WGCNA_1.72-5
## [181] htmlTable_2.4.2 car_3.1-2 AnnotationDbi_1.66.0
## [184] future_1.33.2 filematrix_1.3 munsell_0.5.1
## [187] KernSmooth_2.23-24 geometry_0.4.7 data.table_1.15.4
## [190] htmlwidgets_1.6.4 rlang_1.1.3 fansi_1.0.6